import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.sparse import coo_matrix
from scipy.spatial.distance import cdist
from sklearn.utils.graph import graph_shortest_path
from sklearn.neighbors import kneighbors_graph
import networkx as nx
from IPython.core.display import display, HTML
import warnings
import scipy.io
#warnings.filterwarnings('ignore')
#display(HTML("<style>.container { width:90% !important; }</style>"))
#%load_ext autoreload
#%autoreload 2
images = scipy.io.loadmat('isomap.mat')['images']
images.shape
images=images.T
#Compute the pairwise distance between images
distances = cdist(XA=images, XB=images, metric='euclidean')
distances.max()
### desigining the threshold as per the required minimum number of neighbors
def get_threshold(distances, num_nn):
max_threshold = 0
for i in range(distances.shape[0]):
threshold = sorted(distances[i])[num_nn]
if threshold > max_threshold:
max_threshold = threshold
return max_threshold
threshold = get_threshold(distances, 50)
threshold
# setting the larger distances than the epsioln to Infinity
distances[distances>threshold] = np.inf
### sanity check for minimum number of neighbors
min(np.count_nonzero(~np.isinf(distances), axis=1))
####Adjaceny Matrix from distances matrix, just for the visulaization purpose, networkx
A = distances
A[A>threshold] = 0.0
plt.figure(figsize=(10,10))
plt.imshow(A)
import networkx as nx
import matplotlib.pyplot as plt
plt.figure(figsize=(8,8))
G = nx.from_numpy_matrix(np.array(A))
graph_pos = nx.spring_layout(G)
nx.draw_networkx_nodes(G, graph_pos, node_size=180, node_color='blue', alpha=0.3)
nx.draw_networkx_edges(G, graph_pos, edge_color='grey' )
nx.draw_networkx_labels(G, graph_pos, font_size=6, font_family='sans-serif')
plt.show()
#471, 595, 95, 622, 677,575, 396,305, 173,188
sample_nodes=[471, 595, 95, 622, 677,575, 396,305, 173,188]
f, axarr=plt.subplots(1,10,figsize=(20,30))
for i in range(10):
#plt.subplots(1,i,figsize=(3,4))
#axarr[i].imshow(images[:,sample_nodes[i]].reshape((64, 64)),cmap = 'gray')
node_temp = images[sample_nodes[i],:].reshape((64, 64)).T
axarr[i].imshow(node_temp ,cmap = 'gray')
axarr[i].set_title("Nodenumber=%i" %(sample_nodes[i]),fontsize=8 )
# Compute the shortest path graph D based on the nearest neighbors graph A:
D_e = graph_shortest_path(distances, directed=False)
# Compute the centering matrix H:
m=D_e.shape[0]
I = np.eye(N=D_e.shape[1])
H = I - 1/m * np.ones(I.shape)
# Compute the C matrix:
C_e = -0.5 * H @ D_e**2 @ H
E_e1, U_e1 = np.linalg.eigh(C_e)
idc1 = np.argsort(-E_e1)
E_e1 = E_e1[idc1]
U_e1 = U_e1[:,idc1]
U_e1[:,0].T@U_e1[:,0]
proj_e1 =-U_e1[:,:2]* (E_e1[:2]**0.5)
proj_e1.shape
# Plot our isomap
from matplotlib import offsetbox
def plot_components(proj, images=images, ax=None,
thumb_frac=0.05, cmap='gray',
xlabel="left-right facing",
ylabel="up-down facing"):
fig = plt.figure(figsize=(40,15))
ax = fig.add_subplot(1, 2, 1)
plt.xlabel("left-right facing")
plt.ylabel("up-down facing", labelpad=15).set_rotation(0)
ax.plot(proj[:, 0], proj[:, 1], '.k')
m = proj[:, 0].shape[0]
ax = fig.add_subplot(1, 2, 2)
plt.xlabel("left-right facing")
plt.ylabel("up-down facing", labelpad=15).set_rotation(0)
ax.plot(proj[:, 0], proj[:, 1], '.k')
m = proj[:, 0].shape[0]
min_dist_2 = (thumb_frac * max(proj.max(0) - proj.min(0))) * 2
shown_images = np.array([2 * proj.max(0)])
for i in range(m):
dist = np.sum((proj[i] - shown_images) ** 2, 1)
if np.min(dist) < min_dist_2:
# don't show points that are too close
continue
shown_images = np.vstack([shown_images, proj[i]])
imagebox = offsetbox.AnnotationBbox(offsetbox.OffsetImage(images[i].reshape((64,64)).T, cmap=cmap,zoom=0.75),proj[i])
ax.add_artist(imagebox)
print('######### e-Isomap, Euclidean distance, with (right) & without(left) images overlain #######')
#plot_components(proj_e1, images )
plot_components(proj_e1, images )
#Compute the pairwise distance between images
distances = cdist(XA=images, XB=images, metric='euclidean')
##### now with the k-ISOMAP approach, every node will have just and only K neighbors
A = np.zeros_like(distances)
num_neighbors=50
for i in range(distances.shape[0]):
neighbors = np.argsort(distances[i,:])[:num_neighbors+1]
A[i,neighbors] = distances[i,neighbors]
A[neighbors,i] = distances[neighbors,i]
np.mean(A)
#import networkx as nx
#import matplotlib.pyplot as plt
plt.figure(figsize=(8,8))
G = nx.from_numpy_matrix(np.array(A))
graph_pos = nx.spring_layout(G)
nx.draw_networkx_nodes(G, graph_pos, node_size=180, node_color='blue', alpha=0.3)
nx.draw_networkx_edges(G, graph_pos, edge_color='grey' )
nx.draw_networkx_labels(G, graph_pos, font_size=6, font_family='sans-serif')
plt.show()
D = graph_shortest_path(A)
#C = -1/(2*m) * H @ D**2 @ H
C = -1/(2) * H @ D**2 @ H
vals, vecs = np.linalg.eigh(C)
sorted_vals = vals[np.argsort(-vals)]
sorted_vecs = vecs[:,np.argsort(-vals)]
# Normalize the leading eigenvectors:
proj_k1 = -sorted_vecs[:,:2] * np.sqrt(sorted_vals[:2])
print('######### k-Isomap, Euclidean distance #######')
#plot_components(proj_k1 , images=images)
plot_components(proj_k1 )
distances = cdist(XA=images, XB=images, metric='cityblock')
A = np.zeros_like(distances)
num_neighbors=50
for i in range(distances.shape[0]):
neighbors = np.argsort(distances[i,:])[:num_neighbors+1]
A[i,neighbors] = distances[i,neighbors]
A[neighbors,i] = distances[neighbors,i]
D = graph_shortest_path(A)
C = -1/(2*m) * H @ D**2 @ H
vals, vecs = np.linalg.eigh(C)
sorted_vals = vals[np.argsort(-vals)]
sorted_vecs = vecs[:,np.argsort(-vals)]
proj_k_m = -sorted_vecs[:,:2] * np.sqrt(sorted_vals[:2])
print('######### k-Isomap, Manhattan distance#######')
plot_components(proj_k_m , images=images)
mean_images= np.mean(images, axis=0)
mean_images.shape
centered_images = images-mean_images
centered_images.shape
C_pca = np.cov(centered_images, rowvar=False)
E_pca, U_pca = np.linalg.eigh(C_pca)
idc1 = np.argsort(-E_pca)
E_pca = E_pca[idc1]
U_pca = U_pca[:,idc1] # sort columns
proj_pca= -centered_images @ U_pca[:,:2]
print('#########PCA on images#######')
plot_components(proj_pca , images=images)
where $x_1$ and $x_2$ are the two dimensions respectively $$K(x) = \frac{1}{\sqrt {2\pi}} e^{-\frac{(x_1^2 + x_2^2)}{2}}$$ Recall in this case, the kernel density estimator (KDE) for a density is given by
$$p(x) = \frac 1 m \sum_{i=1}^m \frac 1 h K\left( \frac{x^i - x}{h} \right)$$
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
plt.style.use('seaborn-white')
df = pd.read_csv("n90pol.csv")
df.head()
len(df)
# 1-D histogram:
SMALL_SIZE = 4
MEDIUM_SIZE = 6
BIGGER_SIZE = 15
plt.figure(figsize=(10, 6))
plt.rc('font', size=BIGGER_SIZE)
plt.rc('axes', titlesize=BIGGER_SIZE)
plt.rc('axes', labelsize=BIGGER_SIZE)
plt.rc('xtick', labelsize=BIGGER_SIZE)
plt.hist(df['amygdala'], edgecolor='black', bins=8)
# Add title and axis names
plt.title("Distribution of students' amygdala volume")
plt.xlabel('volume bins')
plt.ylabel('counts')
plt.show()
import seaborn as sns
# 1-D KDE:
fig, ax = plt.subplots(figsize=(10, 6))
g_amygdala = sns.kdeplot(data=df["amygdala"])
ax.legend(['amygdala distribution density'])
# 1-D histogram:
SMALL_SIZE = 4
MEDIUM_SIZE = 6
BIGGER_SIZE = 15
plt.figure(figsize=(10, 6))
plt.rc('font', size=BIGGER_SIZE)
plt.rc('axes', titlesize=BIGGER_SIZE)
plt.rc('axes', labelsize=BIGGER_SIZE)
plt.rc('xtick', labelsize=BIGGER_SIZE)
plt.hist(df['acc'], edgecolor='black', bins=9)
# Add title and axis names
plt.title("Distribution of students' acc volume")
plt.xlabel('volume bins')
plt.ylabel('counts')
plt.show()
# 1-D KDE:
fig, ax = plt.subplots(figsize=(10, 6))
g_acc = sns.kdeplot(data=df["acc"])
ax.legend(['acc distribution density'])
n_bins=[4,6,8,10,12,14,20]
#fig = plt.figure(figsize=plt.figaspect(0.1))
fig = plt.figure(figsize=(40,60))
for i in range (len(n_bins)):
ax = fig.add_subplot(1, len(n_bins), i+1, projection='3d')
# Creating plot
hist, xedges, yedges = np.histogram2d(df['acc'].values, df["amygdala"].values, bins=n_bins[i])
# Construct arrays for the anchor positions of the 5 bars.
xpos, ypos = np.meshgrid(xedges[:-1] + xedges[1:], yedges[:-1] + yedges[1:])
xpos = xpos.flatten()/2.
ypos = ypos.flatten()/2.
zpos = np.zeros_like(xpos)
# Construct arrays with the dimensions for the 5 bars.
dx = dy = 0.5 * np.ones_like(zpos)
dz = hist.ravel()
dx = xedges [1] - xedges [0]
dy = yedges [1] - yedges [0]
dz = hist.flatten()
ax.bar3d(xpos, ypos, zpos, dx, dy, dz, edgecolor='black')
ax.set_xlabel('acc', labelpad=20)
ax.set_ylabel('amygdala', labelpad=20)
ax.set_zlabel('Occurrence', labelpad=20)
plt.locator_params(axis='y', nbins=4)
plt.locator_params(axis='x', nbins=4)
plt.locator_params(axis='z', nbins=4)
ax.title.set_text("number of bins=%i" %(n_bins[i]))
from sklearn.neighbors import KernelDensity
from sklearn.model_selection import GridSearchCV
def get_2d_grid(x, y):
# create grid of sample locations (default: 100x100)
# Define the borders
deltaX = (max(x) - min(x))/10
deltaY = (max(y) - min(y))/10
xmin = min(x) - deltaX
xmax = max(x) + deltaX
ymin = min(y) - deltaY
ymax = max(y) + deltaY
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
return xmin, xmax, ymin, ymax, xx, yy
grid = GridSearchCV(KernelDensity(kernel = 'gaussian'),{'bandwidth': np.linspace(0.00001, 0.5, 200)}, cv = 5)
x = df["amygdala"].values
y = df['acc'].values
xmin, xmax, ymin, ymax, xx, yy = get_2d_grid(x, y)
xy_sample = np.vstack([yy.ravel(), xx.ravel()]).T
xy_train = np.vstack([y, x]).T
grid.fit(xy_train)
print(grid.best_params_)
z = np.exp(grid.best_estimator_.score_samples(xy_sample))
kde = KernelDensity(kernel='gaussian', bandwidth=grid.best_params_['bandwidth']).fit(xy_train)
kde.fit(xy_train)
# score_samples() returns the log-likelihood of the samples
z = np.exp(kde.score_samples(xy_sample))
z = np.reshape(z, xx.shape)
fig, ax = plt.subplots(figsize=(10, 30))
# Contourf plot
cfset = ax.contourf(xx, yy, z)
ax.imshow(np.rot90(z), extent=[xmin, xmax, ymin, ymax])
cset = ax.contour(xx, yy, z, colors='k')
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('acc')
ax.set_ylabel('amygdala')
plt.show()
# 3d kde plot:
fig = plt.figure(figsize=(20, 14))
ax = plt.axes(projection='3d')
surf = ax.plot_surface(xx, yy, z, rstride=1, cstride=1, cmap='binary', edgecolor='black')
ax.set_xlabel('acc')
ax.set_ylabel('amygdala')
ax.set_zlabel('PDF')
ax.set_title('Surface plot of Gaussian 2D KDE')
fig.colorbar(surf, shrink=0.5, aspect=5) # add color bar indicating the PDF
ax.view_init(60, 65)
def get_1d_grid(x):
# create grid of sample locations (default: 100x100)
# Define the borders
deltaX = (max(x) - min(x))/10
xmin = min(x) - deltaX
xmax = max(x) + deltaX
xx = np.linspace(start=xmin, stop=xmax, num=100)
return xx
# p(amygdala) × p(acc)
kde_acc = KernelDensity(bandwidth=0.01).fit(df["acc"].values[:, None])
dens_acc = np.exp(kde_acc.score_samples(get_1d_grid(df["acc"].values[:, None])))
kde_amygdala = KernelDensity(bandwidth=0.01).fit(df["amygdala"].values[:, None])
dens_amygdala = np.exp(kde_amygdala.score_samples(get_1d_grid(df["amygdala"].values[:, None])))
prod_kdes = np.outer(dens_acc, dens_amygdala)
# plot for p(amygdala) × p(acc)
fig, ax = plt.subplots(figsize=(10, 30))
# Contourf plot
cfset = ax.contourf(xx, yy, prod_kdes)
ax.imshow(np.rot90(prod_kdes), extent=[xmin, xmax, ymin, ymax])
cset = ax.contour(xx, yy, prod_kdes, colors='k')
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('acc')
ax.set_ylabel('amygdala')
plt.show()
# error map |p(amygdala, acc)−p(amygdala)p(acc)|:
error = np.abs(z - prod_kdes)
fig, ax = plt.subplots(figsize=(10, 30))
# Contourf plot
cfset = ax.contourf(xx, yy, error)
ax.imshow(np.rot90(error), extent=[xmin, xmax, ymin, ymax])
cset = ax.contour(xx, yy, error, colors='k')
ax.clabel(cset, inline=1, fontsize=10)
ax.set_xlabel('acc')
ax.set_ylabel('amygdala')
plt.show()
def get_plots(axs, ind_x, ind_y, i, dat, var='amygdala'):
sns.kdeplot(data=dat[dat['orientation'] == i][var].values, ax=axs[ind_x, ind_y])
axs[ind_x, ind_y].set_title('p({} | orientation = {})'.format(var, i))
fig, axs = plt.subplots(2, 2, gridspec_kw={'wspace':0, 'hspace':0},
squeeze=True, figsize=(10, 10))
get_plots(axs, 0, 0, 2, df, var='amygdala')
get_plots(axs, 0, 1, 3, df, var='amygdala')
get_plots(axs, 1, 0, 4, df, var='amygdala')
get_plots(axs, 1, 1, 5, df, var='amygdala')
plt.show()
def get_plots(axs, ind_x, ind_y, i, dat, var='acc'):
sns.kdeplot(data=dat[dat['orientation'] == i][var].values, ax=axs[ind_x, ind_y])
axs[ind_x, ind_y].set_title('p({} | orientation = {})'.format(var, i))
fig, axs = plt.subplots(2, 2, gridspec_kw={'wspace':0, 'hspace':0},
squeeze=True, figsize=(10, 10))
get_plots(axs, 0, 0, 2, df, var='acc')
get_plots(axs, 0, 1, 3, df, var='acc')
get_plots(axs, 1, 0, 4, df, var='acc')
get_plots(axs, 1, 1, 5, df, var='acc')
plt.show()
dat[dat['orientation'] == 2][acc].values
df[df['orientation'] == 2]['acc'].values
#### means calculation:
mean ={}
for c in [2,3,4,5]:
for attribute in ['amygdala','acc']:
temp=df[df['orientation'] == c][attribute].values
mean_temp=round(np.mean(temp),3)
mean [attribute,c] = mean_temp
#print ("MEAN {}'s values with oreintation:{} is {}". format(attribute, c, mean_temp ))
mean
| Attribute's Mean | C=2 | C=3 | C=4 | C=5 |
|---|---|---|---|---|
| amygdala | 0.019 | 0.001 | -0.005 | -0.006 |
| acc | -0.015 | 0.002 | 0.001 | 0.008 |
def get_3d_plots(axs, ind_x, ind_y, i, dat):
x = df["amygdala"].values
y = df['acc'].values
xmin, xmax, ymin, ymax, _ , _ = get_2d_grid(x, y)
x = df[df['orientation'] == i]["amygdala"].values
y = df[df['orientation'] == i]['acc'].values
_, _, _, _, xx , yy = get_2d_grid(x, y)
xy_sample = np.vstack([yy.ravel(), xx.ravel()]).T
xy_train = np.vstack([y, x]).T
kde = KernelDensity(kernel='gaussian', bandwidth=grid.best_params_['bandwidth']).fit(xy_train)
z = np.exp(kde.score_samples(xy_sample))
z = np.reshape(z, xx.shape)
cfset = axs[ind_x, ind_y].contourf(xx, yy, z)
cset = axs[ind_x, ind_y].contour(xx, yy, z, colors='k')
axs[ind_x, ind_y].clabel(cset, inline=1, fontsize=10)
axs[ind_x, ind_y].set_xlabel('acc')
axs[ind_x, ind_y].set_ylabel('amygdala')
axs[ind_x, ind_y].set_title('p(amygdala, acc | orientation = {})'.format(i))
fig, axs = plt.subplots(2, 2, gridspec_kw={'wspace':0.2, 'hspace':0.2},
figsize=(15, 15))
get_3d_plots(axs, 0, 0, 2, df)
get_3d_plots(axs, 0, 1, 3, df)
get_3d_plots(axs, 1, 0, 4, df)
get_3d_plots(axs, 1, 1, 5, df)
plt.show()